Load the dataset into pandas¶
In [17]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
Display the first few rows¶
In [18]:
df15 = pd.read_csv('C:/Users/san/OneDrive/Desktop/WorldHappinessData/2015.csv')
df15.head()
Out[18]:
| Country | Region | Happiness Rank | Happiness Score | Standard Error | Economy (GDP per Capita) | Family | Health (Life Expectancy) | Freedom | Trust (Government Corruption) | Generosity | Dystopia Residual | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Switzerland | Western Europe | 1 | 7.587 | 0.03411 | 1.39651 | 1.34951 | 0.94143 | 0.66557 | 0.41978 | 0.29678 | 2.51738 |
| 1 | Iceland | Western Europe | 2 | 7.561 | 0.04884 | 1.30232 | 1.40223 | 0.94784 | 0.62877 | 0.14145 | 0.43630 | 2.70201 |
| 2 | Denmark | Western Europe | 3 | 7.527 | 0.03328 | 1.32548 | 1.36058 | 0.87464 | 0.64938 | 0.48357 | 0.34139 | 2.49204 |
| 3 | Norway | Western Europe | 4 | 7.522 | 0.03880 | 1.45900 | 1.33095 | 0.88521 | 0.66973 | 0.36503 | 0.34699 | 2.46531 |
| 4 | Canada | North America | 5 | 7.427 | 0.03553 | 1.32629 | 1.32261 | 0.90563 | 0.63297 | 0.32957 | 0.45811 | 2.45176 |
In [11]:
df16 = pd.read_csv('C:/Users/san/OneDrive/Desktop/WorldHappinessData/2016.csv')
df16.head()
Out[11]:
| Country | Region | Happiness Rank | Happiness Score | Lower Confidence Interval | Upper Confidence Interval | Economy (GDP per Capita) | Family | Health (Life Expectancy) | Freedom | Trust (Government Corruption) | Generosity | Dystopia Residual | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Denmark | Western Europe | 1 | 7.526 | 7.460 | 7.592 | 1.44178 | 1.16374 | 0.79504 | 0.57941 | 0.44453 | 0.36171 | 2.73939 |
| 1 | Switzerland | Western Europe | 2 | 7.509 | 7.428 | 7.590 | 1.52733 | 1.14524 | 0.86303 | 0.58557 | 0.41203 | 0.28083 | 2.69463 |
| 2 | Iceland | Western Europe | 3 | 7.501 | 7.333 | 7.669 | 1.42666 | 1.18326 | 0.86733 | 0.56624 | 0.14975 | 0.47678 | 2.83137 |
| 3 | Norway | Western Europe | 4 | 7.498 | 7.421 | 7.575 | 1.57744 | 1.12690 | 0.79579 | 0.59609 | 0.35776 | 0.37895 | 2.66465 |
| 4 | Finland | Western Europe | 5 | 7.413 | 7.351 | 7.475 | 1.40598 | 1.13464 | 0.81091 | 0.57104 | 0.41004 | 0.25492 | 2.82596 |
In [12]:
df17 = pd.read_csv('C:/Users/san/OneDrive/Desktop/WorldHappinessData/2017.csv')
df17.head()
Out[12]:
| Region | Country | Happiness.Rank | Happiness.Score | Whisker.high | Whisker.low | Economy..GDP.per.Capita. | Family | Health..Life.Expectancy. | Freedom | Generosity | Trust..Government.Corruption. | Dystopia.Residual | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Western Europe | Norway | 1 | 7.537 | 7.594445 | 7.479556 | 1.616463 | 1.533524 | 0.796667 | 0.635423 | 0.362012 | 0.315964 | 2.277027 |
| 1 | Western Europe | Denmark | 2 | 7.522 | 7.581728 | 7.462272 | 1.482383 | 1.551122 | 0.792566 | 0.626007 | 0.355280 | 0.400770 | 2.313707 |
| 2 | Western Europe | Iceland | 3 | 7.504 | 7.622030 | 7.385970 | 1.480633 | 1.610574 | 0.833552 | 0.627163 | 0.475540 | 0.153527 | 2.322715 |
| 3 | Western Europe | Switzerland | 4 | 7.494 | 7.561772 | 7.426227 | 1.564980 | 1.516912 | 0.858131 | 0.620071 | 0.290549 | 0.367007 | 2.276716 |
| 4 | Western Europe | Finland | 5 | 7.469 | 7.527542 | 7.410458 | 1.443572 | 1.540247 | 0.809158 | 0.617951 | 0.245483 | 0.382612 | 2.430182 |
In [13]:
df18 = pd.read_csv('C:/Users/san/OneDrive/Desktop/WorldHappinessData/2018.csv')
df18.head()
Out[13]:
| Overall rank | Country or region | Score | GDP per capita | Social support | Healthy life expectancy | Freedom to make life choices | Generosity | Perceptions of corruption | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Finland | 7.632 | 1.305 | 1.592 | 0.874 | 0.681 | 0.202 | 0.393 |
| 1 | 2 | Norway | 7.594 | 1.456 | 1.582 | 0.861 | 0.686 | 0.286 | 0.340 |
| 2 | 3 | Denmark | 7.555 | 1.351 | 1.590 | 0.868 | 0.683 | 0.284 | 0.408 |
| 3 | 4 | Iceland | 7.495 | 1.343 | 1.644 | 0.914 | 0.677 | 0.353 | 0.138 |
| 4 | 5 | Switzerland | 7.487 | 1.420 | 1.549 | 0.927 | 0.660 | 0.256 | 0.357 |
In [14]:
df19 = pd.read_csv('C:/Users/san/OneDrive/Desktop/WorldHappinessData/2019.csv')
df19.head()
Out[14]:
| Overall rank | Country or region | Score | GDP per capita | Social support | Healthy life expectancy | Freedom to make life choices | Generosity | Perceptions of corruption | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Finland | 7.769 | 1.340 | 1.587 | 0.986 | 0.596 | 0.153 | 0.393 |
| 1 | 2 | Denmark | 7.600 | 1.383 | 1.573 | 0.996 | 0.592 | 0.252 | 0.410 |
| 2 | 3 | Norway | 7.554 | 1.488 | 1.582 | 1.028 | 0.603 | 0.271 | 0.341 |
| 3 | 4 | Iceland | 7.494 | 1.380 | 1.624 | 1.026 | 0.591 | 0.354 | 0.118 |
| 4 | 5 | Netherlands | 7.488 | 1.396 | 1.522 | 0.999 | 0.557 | 0.322 | 0.298 |
The data types and summary statistics¶
In [64]:
df15.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 158 entries, 0 to 157 Data columns (total 12 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Country 158 non-null object 1 Region 158 non-null object 2 Happiness Rank 158 non-null int64 3 Happiness Score 158 non-null float64 4 Standard Error 158 non-null float64 5 Economy (GDP per Capita) 158 non-null float64 6 Family 158 non-null float64 7 Health (Life Expectancy) 158 non-null float64 8 Freedom 158 non-null float64 9 Trust (Government Corruption) 158 non-null float64 10 Generosity 158 non-null float64 11 Dystopia Residual 158 non-null float64 dtypes: float64(9), int64(1), object(2) memory usage: 14.9+ KB
In [23]:
df16.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 157 entries, 0 to 156 Data columns (total 13 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Country 157 non-null object 1 Region 157 non-null object 2 Happiness Rank 157 non-null int64 3 Happiness Score 157 non-null float64 4 Lower Confidence Interval 157 non-null float64 5 Upper Confidence Interval 157 non-null float64 6 Economy (GDP per Capita) 157 non-null float64 7 Family 157 non-null float64 8 Health (Life Expectancy) 157 non-null float64 9 Freedom 157 non-null float64 10 Trust (Government Corruption) 157 non-null float64 11 Generosity 157 non-null float64 12 Dystopia Residual 157 non-null float64 dtypes: float64(10), int64(1), object(2) memory usage: 16.1+ KB
In [24]:
df17.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 155 entries, 0 to 154 Data columns (total 12 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Country 155 non-null object 1 Happiness.Rank 155 non-null int64 2 Happiness.Score 155 non-null float64 3 Whisker.high 155 non-null float64 4 Whisker.low 155 non-null float64 5 Economy..GDP.per.Capita. 155 non-null float64 6 Family 155 non-null float64 7 Health..Life.Expectancy. 155 non-null float64 8 Freedom 155 non-null float64 9 Generosity 155 non-null float64 10 Trust..Government.Corruption. 155 non-null float64 11 Dystopia.Residual 155 non-null float64 dtypes: float64(10), int64(1), object(1) memory usage: 14.7+ KB
In [25]:
df18.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 156 entries, 0 to 155 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Overall rank 156 non-null int64 1 Country or region 156 non-null object 2 Score 156 non-null float64 3 GDP per capita 156 non-null float64 4 Social support 156 non-null float64 5 Healthy life expectancy 156 non-null float64 6 Freedom to make life choices 156 non-null float64 7 Generosity 156 non-null float64 8 Perceptions of corruption 155 non-null float64 dtypes: float64(7), int64(1), object(1) memory usage: 11.1+ KB
In [26]:
df19.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 156 entries, 0 to 155 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Overall rank 156 non-null int64 1 Country or region 156 non-null object 2 Score 156 non-null float64 3 GDP per capita 156 non-null float64 4 Social support 156 non-null float64 5 Healthy life expectancy 156 non-null float64 6 Freedom to make life choices 156 non-null float64 7 Generosity 156 non-null float64 8 Perceptions of corruption 156 non-null float64 dtypes: float64(7), int64(1), object(1) memory usage: 11.1+ KB
In [27]:
df15.describe()
Out[27]:
| Overall rank | Score | GDP per capita | Social support | Healthy life expectancy | Freedom to make life choices | Generosity | Perceptions of corruption | |
|---|---|---|---|---|---|---|---|---|
| count | 156.000000 | 156.000000 | 156.000000 | 156.000000 | 156.000000 | 156.000000 | 156.000000 | 156.000000 |
| mean | 78.500000 | 5.407096 | 0.905147 | 1.208814 | 0.725244 | 0.392571 | 0.184846 | 0.110603 |
| std | 45.177428 | 1.113120 | 0.398389 | 0.299191 | 0.242124 | 0.143289 | 0.095254 | 0.094538 |
| min | 1.000000 | 2.853000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 39.750000 | 4.544500 | 0.602750 | 1.055750 | 0.547750 | 0.308000 | 0.108750 | 0.047000 |
| 50% | 78.500000 | 5.379500 | 0.960000 | 1.271500 | 0.789000 | 0.417000 | 0.177500 | 0.085500 |
| 75% | 117.250000 | 6.184500 | 1.232500 | 1.452500 | 0.881750 | 0.507250 | 0.248250 | 0.141250 |
| max | 156.000000 | 7.769000 | 1.684000 | 1.624000 | 1.141000 | 0.631000 | 0.566000 | 0.453000 |
In [28]:
df16.describe()
Out[28]:
| Happiness Rank | Happiness Score | Lower Confidence Interval | Upper Confidence Interval | Economy (GDP per Capita) | Family | Health (Life Expectancy) | Freedom | Trust (Government Corruption) | Generosity | Dystopia Residual | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 157.000000 | 157.000000 | 157.000000 | 157.000000 | 157.000000 | 157.000000 | 157.000000 | 157.000000 | 157.000000 | 157.000000 | 157.000000 |
| mean | 78.980892 | 5.382185 | 5.282395 | 5.481975 | 0.953880 | 0.793621 | 0.557619 | 0.370994 | 0.137624 | 0.242635 | 2.325807 |
| std | 45.466030 | 1.141674 | 1.148043 | 1.136493 | 0.412595 | 0.266706 | 0.229349 | 0.145507 | 0.111038 | 0.133756 | 0.542220 |
| min | 1.000000 | 2.905000 | 2.732000 | 3.078000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.817890 |
| 25% | 40.000000 | 4.404000 | 4.327000 | 4.465000 | 0.670240 | 0.641840 | 0.382910 | 0.257480 | 0.061260 | 0.154570 | 2.031710 |
| 50% | 79.000000 | 5.314000 | 5.237000 | 5.419000 | 1.027800 | 0.841420 | 0.596590 | 0.397470 | 0.105470 | 0.222450 | 2.290740 |
| 75% | 118.000000 | 6.269000 | 6.154000 | 6.434000 | 1.279640 | 1.021520 | 0.729930 | 0.484530 | 0.175540 | 0.311850 | 2.664650 |
| max | 157.000000 | 7.526000 | 7.460000 | 7.669000 | 1.824270 | 1.183260 | 0.952770 | 0.608480 | 0.505210 | 0.819710 | 3.837720 |
In [95]:
df17.describe()
Out[95]:
| Happiness.Rank | Happiness.Score | Whisker.high | Whisker.low | Economy..GDP.per.Capita. | Family | Health..Life.Expectancy. | Freedom | Generosity | Trust..Government.Corruption. | Dystopia.Residual | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 155.000000 | 155.000000 | 155.000000 | 155.000000 | 155.000000 | 155.000000 | 155.000000 | 155.000000 | 155.000000 | 155.000000 | 155.000000 |
| mean | 78.000000 | 5.354019 | 5.452326 | 5.255713 | 0.984718 | 1.188898 | 0.551341 | 0.408786 | 0.246883 | 0.123120 | 1.850238 |
| std | 44.888751 | 1.131230 | 1.118542 | 1.145030 | 0.420793 | 0.287263 | 0.237073 | 0.149997 | 0.134780 | 0.101661 | 0.500028 |
| min | 1.000000 | 2.693000 | 2.864884 | 2.521116 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.377914 |
| 25% | 39.500000 | 4.505500 | 4.608172 | 4.374955 | 0.663371 | 1.042635 | 0.369866 | 0.303677 | 0.154106 | 0.057271 | 1.591291 |
| 50% | 78.000000 | 5.279000 | 5.370032 | 5.193152 | 1.064578 | 1.253918 | 0.606042 | 0.437454 | 0.231538 | 0.089848 | 1.832910 |
| 75% | 116.500000 | 6.101500 | 6.194600 | 6.006527 | 1.318027 | 1.414316 | 0.723008 | 0.516561 | 0.323762 | 0.153296 | 2.144654 |
| max | 155.000000 | 7.537000 | 7.622030 | 7.479556 | 1.870766 | 1.610574 | 0.949492 | 0.658249 | 0.838075 | 0.464308 | 3.117485 |
In [30]:
df18.describe()
Out[30]:
| Overall rank | Score | GDP per capita | Social support | Healthy life expectancy | Freedom to make life choices | Generosity | Perceptions of corruption | |
|---|---|---|---|---|---|---|---|---|
| count | 156.000000 | 156.000000 | 156.000000 | 156.000000 | 156.000000 | 156.000000 | 156.000000 | 155.000000 |
| mean | 78.500000 | 5.375917 | 0.891449 | 1.213237 | 0.597346 | 0.454506 | 0.181006 | 0.112000 |
| std | 45.177428 | 1.119506 | 0.391921 | 0.302372 | 0.247579 | 0.162424 | 0.098471 | 0.096492 |
| min | 1.000000 | 2.905000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 39.750000 | 4.453750 | 0.616250 | 1.066750 | 0.422250 | 0.356000 | 0.109500 | 0.051000 |
| 50% | 78.500000 | 5.378000 | 0.949500 | 1.255000 | 0.644000 | 0.487000 | 0.174000 | 0.082000 |
| 75% | 117.250000 | 6.168500 | 1.197750 | 1.463000 | 0.777250 | 0.578500 | 0.239000 | 0.137000 |
| max | 156.000000 | 7.632000 | 2.096000 | 1.644000 | 1.030000 | 0.724000 | 0.598000 | 0.457000 |
In [33]:
df19.describe()
Out[33]:
| Overall rank | Score | GDP per capita | Social support | Healthy life expectancy | Freedom to make life choices | Generosity | Perceptions of corruption | |
|---|---|---|---|---|---|---|---|---|
| count | 156.000000 | 156.000000 | 156.000000 | 156.000000 | 156.000000 | 156.000000 | 156.000000 | 156.000000 |
| mean | 78.500000 | 5.407096 | 0.905147 | 1.208814 | 0.725244 | 0.392571 | 0.184846 | 0.110603 |
| std | 45.177428 | 1.113120 | 0.398389 | 0.299191 | 0.242124 | 0.143289 | 0.095254 | 0.094538 |
| min | 1.000000 | 2.853000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 39.750000 | 4.544500 | 0.602750 | 1.055750 | 0.547750 | 0.308000 | 0.108750 | 0.047000 |
| 50% | 78.500000 | 5.379500 | 0.960000 | 1.271500 | 0.789000 | 0.417000 | 0.177500 | 0.085500 |
| 75% | 117.250000 | 6.184500 | 1.232500 | 1.452500 | 0.881750 | 0.507250 | 0.248250 | 0.141250 |
| max | 156.000000 | 7.769000 | 1.684000 | 1.624000 | 1.141000 | 0.631000 | 0.566000 | 0.453000 |
Null and duplicate values¶
In [65]:
df15.isnull().sum()
Out[65]:
Country 0 Region 0 Happiness Rank 0 Happiness Score 0 Standard Error 0 Economy (GDP per Capita) 0 Family 0 Health (Life Expectancy) 0 Freedom 0 Trust (Government Corruption) 0 Generosity 0 Dystopia Residual 0 dtype: int64
In [83]:
df15.duplicated().sum()
Out[83]:
0
In [36]:
df16.isnull().sum()
Out[36]:
Country 0 Region 0 Happiness Rank 0 Happiness Score 0 Lower Confidence Interval 0 Upper Confidence Interval 0 Economy (GDP per Capita) 0 Family 0 Health (Life Expectancy) 0 Freedom 0 Trust (Government Corruption) 0 Generosity 0 Dystopia Residual 0 dtype: int64
In [84]:
df16.duplicated().sum()
Out[84]:
0
In [94]:
df17.isnull().sum()
Out[94]:
Region 35 Country 0 Happiness.Rank 0 Happiness.Score 0 Whisker.high 0 Whisker.low 0 Economy..GDP.per.Capita. 0 Family 0 Health..Life.Expectancy. 0 Freedom 0 Generosity 0 Trust..Government.Corruption. 0 Dystopia.Residual 0 dtype: int64
In [96]:
df17.duplicated().sum()
Out[96]:
0
In [39]:
df18.isnull().sum()
Out[39]:
Overall rank 0 Country or region 0 Score 0 GDP per capita 0 Social support 0 Healthy life expectancy 0 Freedom to make life choices 0 Generosity 0 Perceptions of corruption 1 dtype: int64
In [44]:
df18['Perceptions of corruption'] = df18['Perceptions of corruption'].replace('N/A', 0)
In [86]:
df18.duplicated().sum()
Out[86]:
0
In [40]:
df19.isnull().sum()
Out[40]:
Overall rank 0 Country or region 0 Score 0 GDP per capita 0 Social support 0 Healthy life expectancy 0 Freedom to make life choices 0 Generosity 0 Perceptions of corruption 0 dtype: int64
In [81]:
df19.duplicated().sum()
Out[81]:
0
Histogram of Happiness Scores and features¶
In [154]:
num_features = df15.select_dtypes('number').columns
df15[num_features].hist(bins=15,figsize=(15,6) ,layout=(2,5))
Out[154]:
array([[<Axes: title={'center': 'Happiness Rank'}>,
<Axes: title={'center': 'Happiness Score'}>,
<Axes: title={'center': 'Standard Error'}>,
<Axes: title={'center': 'Economy (GDP per Capita)'}>,
<Axes: title={'center': 'Family'}>],
[<Axes: title={'center': 'Health (Life Expectancy)'}>,
<Axes: title={'center': 'Freedom'}>,
<Axes: title={'center': 'Trust (Government Corruption)'}>,
<Axes: title={'center': 'Generosity'}>,
<Axes: title={'center': 'Dystopia Residual'}>]], dtype=object)
In [52]:
num_features16 = df16.select_dtypes('number').columns
df16[num_features16].hist(bins=15,figsize=(15,6) ,layout=(3,4))
Out[52]:
array([[<Axes: title={'center': 'Happiness Rank'}>,
<Axes: title={'center': 'Happiness Score'}>,
<Axes: title={'center': 'Lower Confidence Interval'}>,
<Axes: title={'center': 'Upper Confidence Interval'}>],
[<Axes: title={'center': 'Economy (GDP per Capita)'}>,
<Axes: title={'center': 'Family'}>,
<Axes: title={'center': 'Health (Life Expectancy)'}>,
<Axes: title={'center': 'Freedom'}>],
[<Axes: title={'center': 'Trust (Government Corruption)'}>,
<Axes: title={'center': 'Generosity'}>,
<Axes: title={'center': 'Dystopia Residual'}>, <Axes: >]],
dtype=object)
In [56]:
num_features17 = df17.select_dtypes('number').columns
df17[num_features17].hist(bins=15,figsize=(15,6) ,layout=(3,4))
Out[56]:
array([[<Axes: title={'center': 'Happiness.Rank'}>,
<Axes: title={'center': 'Happiness.Score'}>,
<Axes: title={'center': 'Whisker.high'}>,
<Axes: title={'center': 'Whisker.low'}>],
[<Axes: title={'center': 'Economy..GDP.per.Capita.'}>,
<Axes: title={'center': 'Family'}>,
<Axes: title={'center': 'Health..Life.Expectancy.'}>,
<Axes: title={'center': 'Freedom'}>],
[<Axes: title={'center': 'Generosity'}>,
<Axes: title={'center': 'Trust..Government.Corruption.'}>,
<Axes: title={'center': 'Dystopia.Residual'}>, <Axes: >]],
dtype=object)
In [58]:
num_features18 = df18.select_dtypes('number').columns
df18[num_features18].hist(bins=15,figsize=(15,6) ,layout=(2,4))
Out[58]:
array([[<Axes: title={'center': 'Overall rank'}>,
<Axes: title={'center': 'Score'}>,
<Axes: title={'center': 'GDP per capita'}>,
<Axes: title={'center': 'Social support'}>],
[<Axes: title={'center': 'Healthy life expectancy'}>,
<Axes: title={'center': 'Freedom to make life choices'}>,
<Axes: title={'center': 'Generosity'}>,
<Axes: title={'center': 'Perceptions of corruption'}>]],
dtype=object)
In [60]:
num_features19 = df19.select_dtypes('number').columns
df19[num_features19].hist(bins=15,figsize=(15,6) ,layout=(2,4))
Out[60]:
array([[<Axes: title={'center': 'Overall rank'}>,
<Axes: title={'center': 'Score'}>,
<Axes: title={'center': 'GDP per capita'}>,
<Axes: title={'center': 'Social support'}>],
[<Axes: title={'center': 'Healthy life expectancy'}>,
<Axes: title={'center': 'Freedom to make life choices'}>,
<Axes: title={'center': 'Generosity'}>,
<Axes: title={'center': 'Perceptions of corruption'}>]],
dtype=object)
Scatter Plots¶
In [6]:
plt.subplots(figsize=(10,7))
sns.scatterplot(x=df15['Happiness Score'], y = df15['Economy (GDP per Capita)'], hue = df15['Region'] , s = 200);
plt.legend(loc = 'upper left', fontsize = '10')
plt.title('Plot between Hapiness score and GDP,2015')
plt.xlabel('Happiness Score')
plt.ylabel('GDP')
plt.show()
plt.subplots(figsize=(10,7))
sns.scatterplot(x=df15['Happiness Score'], y = df15['Family'], hue = df15['Region'] , s = 200);
plt.legend(loc = 'upper left', fontsize = '10')
plt.title('Plot between Hapiness score and Family,2015')
plt.xlabel('Happiness Score')
plt.ylabel('Family')
plt.show()
plt.subplots(figsize=(10,7))
sns.scatterplot(x=df15['Happiness Score'], y = df15['Health (Life Expectancy)'], hue = df15['Region'] , s = 200);
plt.legend(loc = 'upper left', fontsize = '10')
plt.title('Plot between Hapiness score and Life Expectancy,2015')
plt.xlabel('Happiness Score')
plt.ylabel('Life Expectancy')
plt.show()
plt.subplots(figsize=(10,7))
sns.scatterplot(x=df15['Happiness Score'], y = df15['Freedom'], hue = df15['Region'] , s = 200);
plt.legend(loc = 'upper left', fontsize = '10')
plt.title('Plot between Hapiness score and Freedom,2015')
plt.xlabel('Happiness Score')
plt.ylabel('Freedom')
plt.show()
plt.subplots(figsize=(10,7))
sns.scatterplot(x=df15['Happiness Score'], y = df15['Trust (Government Corruption)'], hue = df15['Region'] , s = 200);
plt.legend(loc = 'upper left', fontsize = '10')
plt.title('Plot between Hapiness score and Corruption,2015')
plt.xlabel('Happiness Score')
plt.ylabel('Corruption')
plt.show()
In [75]:
plt.subplots(figsize=(10,7))
sns.scatterplot(x=df16['Happiness Score'], y = df16['Economy (GDP per Capita)'], hue = df16['Region'] , s = 200);
plt.legend(loc = 'upper left', fontsize = '8')
plt.title('Plot between Hapiness score and GDP,2016')
plt.xlabel('Happiness Score')
plt.ylabel('GDP')
plt.show()
In [93]:
plt.subplots(figsize=(10,7))
sns.scatterplot(x=df17['Happiness.Score'], y = df17['Economy..GDP.per.Capita.'], hue = df17['Region'] , s = 200);
plt.legend(loc = 'upper left', fontsize = '8')
plt.title('Plot between Hapiness score and GDP,2017')
plt.xlabel('Happiness Score')
plt.ylabel('GDP')
plt.show()
In [77]:
plt.subplots(figsize=(10,7))
sns.scatterplot(x=df18['Score'], y = df18['GDP per capita'], hue = df18['Country or region'] , s = 200);
plt.legend(loc = 'upper left', fontsize = '8')
plt.title('Plot between Hapiness score and GDP,2018')
plt.xlabel('Happiness Score')
plt.ylabel('GDP')
plt.show()
In [79]:
plt.subplots(figsize=(10,7))
sns.scatterplot(x=df19['Score'], y = df19['GDP per capita'], hue = df19['Country or region'] , s = 200);
plt.legend(loc = 'upper left', fontsize = '8')
plt.title('Plot between Hapiness score and GDP,2019')
plt.xlabel('Happiness Score')
plt.ylabel('GDP')
plt.show()
Correlation Map¶
In [50]:
numeric_df15 = df15.select_dtypes(include=[np.number])
cor15 = numeric_df15.corr(method = 'pearson')
f, ax = plt.subplots(figsize = (12,8))
sns.heatmap(cor15, cmap="Reds", xticklabels=cor.columns, yticklabels=cor.columns, square=False)
plt.title('2015,Correlation Map')
plt.show()
numeric_df16 = df16.select_dtypes(include=[np.number])
cor16 = numeric_df16.corr(method = 'pearson')
f, ax = plt.subplots(figsize = (12,8))
sns.heatmap(cor16, cmap="Reds", xticklabels=cor.columns, yticklabels=cor.columns, square=False)
plt.title('2016,Correlation Map')
plt.show()
numeric_df17 = df17.select_dtypes(include=[np.number])
cor17 = numeric_df17.corr(method = 'pearson')
f, ax = plt.subplots(figsize = (12,8))
sns.heatmap(cor17, cmap="Reds", xticklabels=cor.columns, yticklabels=cor.columns, square=False)
plt.title('2017,Correlation Map')
plt.show()
numeric_df18 = df18.select_dtypes(include=[np.number])
cor18 = numeric_df18.corr(method = 'pearson')
f, ax = plt.subplots(figsize = (12,8))
sns.heatmap(cor18, cmap="Reds", xticklabels=cor.columns, yticklabels=cor.columns, square=False)
plt.title('2018,Correlation Map')
plt.show()
numeric_df19 = df19.select_dtypes(include=[np.number])
cor19 = numeric_df19.corr(method = 'pearson')
f, ax = plt.subplots(figsize = (12,8))
sns.heatmap(cor19, cmap="Reds", xticklabels=cor.columns, yticklabels=cor.columns, square=False)
plt.title('2019,Correlation Map')
plt.show()
The squares with a darker shade of red show a high correlation between the variables. As the color becomes lighter the correlation between the variables is low¶
Pair Plots¶
In [20]:
sns.pairplot(df15, hue ='Happiness Score') # Use 'hue' to color by category, if available
plt.suptitle('Pair Plot of Happiness Score,2015', y=1.02) # Add a title, adjust 'y' to position it correctly
plt.show()
In [19]:
sns.pairplot(df16, hue ='Happiness Score') # Use 'hue' to color by category, if available
plt.suptitle('Pair Plot of Happiness Score,2016', y=1.02) # Add a title, adjust 'y' to position it correctly
plt.show()
In [21]:
sns.pairplot(df17, hue ='Happiness.Score') # Use 'hue' to color by category, if available
plt.suptitle('Pair Plot of Happiness Score,2017', y=1.02) # Add a title, adjust 'y' to position it correctly
plt.show()
Facet Grids¶
In [ ]:
# Load or create a sample dataset
#df = sns.load_dataset('tips') # Example dataset; replace with your own DataFrame
# Create a facet grid
g = sns.FacetGrid(df15, col='Happiness Score', row='Economy (GDP per Capita)', margin_titles=True)
g.map_dataframe(sns.scatterplot, x="Happiness Score", y="Economy (GDP per Capita)")
g.set_axis_labels("Happiness Score", "GDP")
g.add_legend()
plt.suptitle('2015', y=1.02) # Add a title
plt.show()
Choropleth map¶
In [97]:
pip install plotly
Requirement already satisfied: plotly in c:\users\san\anaconda3\lib\site-packages (5.22.0)Note: you may need to restart the kernel to use updated packages. Requirement already satisfied: tenacity>=6.2.0 in c:\users\san\anaconda3\lib\site-packages (from plotly) (8.2.2) Requirement already satisfied: packaging in c:\users\san\anaconda3\lib\site-packages (from plotly) (23.2)
In [114]:
import plotly.express as px
fig = px.choropleth(
data_frame=df19,
locations='Country or region',
locationmode='country names',
color='Score',
color_continuous_scale='Viridis',
title='Global Happiness Scores,2019',
labels={'Happiness Score': 'Happiness Score'},
projection='natural earth',
hover_data=['Score']
)
# Show the plot
fig.show()
In [129]:
fig = px.choropleth(
df15,
locations='Country',
locationmode='country names',
color='Happiness Score',
color_continuous_scale='Viridis',
title='Global Happiness Scores,2015',
labels={'Happiness Score': 'Happiness Score'},
projection='natural earth',
hover_data=['Happiness Score']
)
# Show the plot
fig.show()
In [131]:
fig = px.choropleth(
df16,
locations='Country',
locationmode='country names',
color='Happiness Score',
color_continuous_scale='Viridis',
title='Global Happiness Scores,2016',
labels={'Happiness Score': 'Happiness Score'},
projection='natural earth',
hover_data=['Happiness Score']
)
# Show the plot
fig.show()
In [133]:
fig = px.choropleth(
df17,
locations='Country',
locationmode='country names',
color='Happiness.Score',
color_continuous_scale='Viridis',
title='Global Happiness Scores,2017',
labels={'Happiness Score': 'Happiness Score'},
projection='natural earth',
hover_data=['Happiness.Score']
)
# Show the plot
fig.show()
In [135]:
fig = px.choropleth(
df18,
locations='Country or region',
locationmode='country names',
color='Score',
color_continuous_scale='Viridis',
title='Global Happiness Scores,2018',
labels={'Happiness Score': 'Happiness Score'},
projection='natural earth',
hover_data=['Score']
)
# Show the plot
fig.show()
Happiness score in different Regions¶
In [11]:
Happiness15 = df15.groupby('Region')[['Happiness Score']].mean()
Happiness15
Out[11]:
| Happiness Score | |
|---|---|
| Region | |
| Australia and New Zealand | 7.285000 |
| Central and Eastern Europe | 5.332931 |
| Eastern Asia | 5.626167 |
| Latin America and Caribbean | 6.144682 |
| Middle East and Northern Africa | 5.406900 |
| North America | 7.273000 |
| Southeastern Asia | 5.317444 |
| Southern Asia | 4.580857 |
| Sub-Saharan Africa | 4.202800 |
| Western Europe | 6.689619 |
In [12]:
Happiness16 = df16.groupby('Region')[['Happiness Score']].mean()
Happiness16
Out[12]:
| Happiness Score | |
|---|---|
| Region | |
| Australia and New Zealand | 7.323500 |
| Central and Eastern Europe | 5.370690 |
| Eastern Asia | 5.624167 |
| Latin America and Caribbean | 6.101750 |
| Middle East and Northern Africa | 5.386053 |
| North America | 7.254000 |
| Southeastern Asia | 5.338889 |
| Southern Asia | 4.563286 |
| Sub-Saharan Africa | 4.136421 |
| Western Europe | 6.685667 |
In [13]:
Happiness17 = df17.groupby('Region')[['Happiness.Score']].mean()
Happiness17
Out[13]:
| Happiness.Score | |
|---|---|
| Region | |
| Australia and New Zealand | 7.299000 |
| Central and Eastern Europe | 5.481864 |
| Eastern Asia | 5.863250 |
| Latin America and Caribbean | 5.980895 |
| Middle East and Northern Africa | 5.459133 |
| North America | 7.154500 |
| Southeastern Asia | 5.807667 |
| Southern Asia | 4.628429 |
| Sub-Saharan Africa | 4.102286 |
| Western Europe | 7.095600 |
In [14]:
Happiness15.plot(kind='pie', autopct='%1.1f%%', figsize=(10, 6), legend=False, subplots = True)
plt.title('Happiness Score 2015')
plt.ylabel('') # Optional: hides the ylabel for cleaner look
plt.show()
Happiness16.plot(kind='pie', autopct='%1.1f%%', figsize=(10, 6), legend=False, subplots = True)
plt.title('Happiness Score 2016')
plt.ylabel('') # Optional: hides the ylabel for cleaner look
plt.show()
Happiness17.plot(kind='pie', autopct='%1.1f%%', figsize=(10, 6), legend=False, subplots = True)
plt.title('Happiness Score 2017')
plt.ylabel('') # Optional: hides the ylabel for cleaner look
plt.show()
In [ ]: